In [18]:
import pandas as pd
cdr = pd.read_excel("cdr_data_export.xlsx")
cdr.head()
Out[18]:
Unfortunately: Information about the tower's locations are missing!
We need a second data source from the DARKNET!
In [2]:
towers = pd.read_csv("darknet.io/hacks/infrastructure/mobile_net/texas_towers.csv", index_col=0)
towers.head()
Out[2]:
In [3]:
call_data = cdr.join(towers, on='TowerID')
call_data.head()
Out[3]:
In [4]:
call_data[['Caller', 'Symbol', 'Callee']] = call_data['Call'].str.split("(->|-X)", expand=True)
call_data.head()
Out[4]:
In [5]:
call_data['Event'] = call_data['Symbol'].map(
{
"->" : "Incoming",
"-X" : "Missed"
})
call_data.head()
Out[5]:
In [6]:
suspect_data = call_data[(call_data['Callee'] == '04638472273') | (call_data['Caller'] == '04638472273')].copy()
suspect_data.head()
Out[6]:
In [7]:
suspect_data['Start'] = pd.to_datetime(suspect_data['Start'])
suspect_data.head()
Out[7]:
In [8]:
suspect_data['DoW'] = suspect_data['Start'].dt.weekday_name
suspect_data.head()
Out[8]:
In [9]:
suspect_data.plot.scatter('TowerLon', "TowerLat");
In [10]:
suspect_on_weekend = suspect_data[suspect_data['DoW'].isin(['Saturday', 'Sunday'])].copy()
suspect_on_weekend.head()
Out[10]:
In [11]:
suspect_on_weekend.plot.scatter('TowerLon', "TowerLat");
In [12]:
suspect_on_weekend['Start']
suspect_on_weekend['hour'] = suspect_on_weekend['Start'].dt.hour
suspect_on_weekend.head()
Out[12]:
In [13]:
suspect_on_weekend_night = suspect_on_weekend[
(suspect_on_weekend['hour'] < 6) | (suspect_on_weekend['hour'] > 22)]
suspect_on_weekend_night.head()
Out[13]:
In [14]:
ax = suspect_on_weekend_night.plot.scatter('TowerLat', 'TowerLon')
In [15]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters = 1)
data = suspect_on_weekend_night[['TowerLat', 'TowerLon']]
kmeans.fit_predict(data)
centroids = kmeans.cluster_centers_
In [16]:
ax.scatter(x = centroids[:, 0], y = centroids[:, 1], c = 'r', marker = 'x')
ax.figure
Out[16]:
In [17]:
centroids
Out[17]: